home *** CD-ROM | disk | FTP | other *** search
/ Chip 2007 January, February, March & April / Chip-Cover-CD-2007-02.iso / Pakiet bezpieczenstwa / mini Pentoo LiveCD 2006.1 / mpentoo-2006.1.iso / livecd.squashfs / usr / lib / python2.4 / email / FeedParser.pyo (.txt) < prev    next >
Python Compiled Bytecode  |  2005-10-18  |  11KB  |  445 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyo (Python 2.4)
  3.  
  4. """FeedParser - An email feed parser.
  5.  
  6. The feed parser implements an interface for incrementally parsing an email
  7. message, line by line.  This has advantages for certain applications, such as
  8. those reading email messages off a socket.
  9.  
  10. FeedParser.feed() is the primary interface for pushing new data into the
  11. parser.  It returns when there's nothing more it can do with the available
  12. data.  When you have no more data to push into the parser, call .close().
  13. This completes the parsing and returns the root message object.
  14.  
  15. The other advantage of this parser is that it will never throw a parsing
  16. exception.  Instead, when it finds something unexpected, it adds a 'defect' to
  17. the current message.  Defects are just instances that live on the message
  18. object's .defects attribute.
  19. """
  20. import re
  21. from email import Errors
  22. from email import Message
  23. NLCRE = re.compile('\r\n|\r|\n')
  24. NLCRE_bol = re.compile('(\r\n|\r|\n)')
  25. NLCRE_eol = re.compile('(\r\n|\r|\n)$')
  26. NLCRE_crack = re.compile('(\r\n|\r|\n)')
  27. headerRE = re.compile('^(From |[\\041-\\071\\073-\\176]{2,}:|[\\t ])')
  28. EMPTYSTRING = ''
  29. NL = '\n'
  30. NeedMoreData = object()
  31.  
  32. class BufferedSubFile(object):
  33.     '''A file-ish object that can have new data loaded into it.
  34.  
  35.     You can also push and pop line-matching predicates onto a stack.  When the
  36.     current predicate matches the current line, a false EOF response
  37.     (i.e. empty string) is returned instead.  This lets the parser adhere to a
  38.     simple abstraction -- it parses until EOF closes the current message.
  39.     '''
  40.     
  41.     def __init__(self):
  42.         self._partial = ''
  43.         self._lines = []
  44.         self._eofstack = []
  45.         self._closed = False
  46.  
  47.     
  48.     def push_eof_matcher(self, pred):
  49.         self._eofstack.append(pred)
  50.  
  51.     
  52.     def pop_eof_matcher(self):
  53.         return self._eofstack.pop()
  54.  
  55.     
  56.     def close(self):
  57.         self._lines.append(self._partial)
  58.         self._partial = ''
  59.         self._closed = True
  60.  
  61.     
  62.     def readline(self):
  63.         if not self._lines:
  64.             if self._closed:
  65.                 return ''
  66.             
  67.             return NeedMoreData
  68.         
  69.         line = self._lines.pop()
  70.         for ateof in self._eofstack[::-1]:
  71.             if ateof(line):
  72.                 self._lines.append(line)
  73.                 return ''
  74.                 continue
  75.         
  76.         return line
  77.  
  78.     
  79.     def unreadline(self, line):
  80.         self._lines.append(line)
  81.  
  82.     
  83.     def push(self, data):
  84.         '''Push some new data into this object.'''
  85.         data = self._partial + data
  86.         self._partial = ''
  87.         parts = NLCRE_crack.split(data)
  88.         self._partial = parts.pop()
  89.         lines = []
  90.         for i in range(len(parts) // 2):
  91.             lines.append(parts[i * 2] + parts[i * 2 + 1])
  92.         
  93.         self.pushlines(lines)
  94.  
  95.     
  96.     def pushlines(self, lines):
  97.         self._lines[:0] = lines[::-1]
  98.  
  99.     
  100.     def is_closed(self):
  101.         return self._closed
  102.  
  103.     
  104.     def __iter__(self):
  105.         return self
  106.  
  107.     
  108.     def next(self):
  109.         line = self.readline()
  110.         if line == '':
  111.             raise StopIteration
  112.         
  113.         return line
  114.  
  115.  
  116.  
  117. class FeedParser:
  118.     '''A feed-style parser of email.'''
  119.     
  120.     def __init__(self, _factory = Message.Message):
  121.         '''_factory is called with no arguments to create a new message obj'''
  122.         self._factory = _factory
  123.         self._input = BufferedSubFile()
  124.         self._msgstack = []
  125.         self._parse = self._parsegen().next
  126.         self._cur = None
  127.         self._last = None
  128.         self._headersonly = False
  129.  
  130.     
  131.     def _set_headersonly(self):
  132.         self._headersonly = True
  133.  
  134.     
  135.     def feed(self, data):
  136.         '''Push more data into the parser.'''
  137.         self._input.push(data)
  138.         self._call_parse()
  139.  
  140.     
  141.     def _call_parse(self):
  142.         
  143.         try:
  144.             self._parse()
  145.         except StopIteration:
  146.             pass
  147.  
  148.  
  149.     
  150.     def close(self):
  151.         '''Parse all remaining data and return the root message object.'''
  152.         self._input.close()
  153.         self._call_parse()
  154.         root = self._pop_message()
  155.         if root.get_content_maintype() == 'multipart' and not root.is_multipart():
  156.             root.defects.append(Errors.MultipartInvariantViolationDefect())
  157.         
  158.         return root
  159.  
  160.     
  161.     def _new_message(self):
  162.         msg = self._factory()
  163.         if self._cur and self._cur.get_content_type() == 'multipart/digest':
  164.             msg.set_default_type('message/rfc822')
  165.         
  166.         if self._msgstack:
  167.             self._msgstack[-1].attach(msg)
  168.         
  169.         self._msgstack.append(msg)
  170.         self._cur = msg
  171.         self._last = msg
  172.  
  173.     
  174.     def _pop_message(self):
  175.         retval = self._msgstack.pop()
  176.         if self._msgstack:
  177.             self._cur = self._msgstack[-1]
  178.         else:
  179.             self._cur = None
  180.         return retval
  181.  
  182.     
  183.     def _parsegen(self):
  184.         self._new_message()
  185.         headers = []
  186.         for line in self._input:
  187.             if line is NeedMoreData:
  188.                 yield NeedMoreData
  189.                 continue
  190.             
  191.             if not headerRE.match(line):
  192.                 if not NLCRE.match(line):
  193.                     self._input.unreadline(line)
  194.                 
  195.                 break
  196.             
  197.             headers.append(line)
  198.         
  199.         self._parse_headers(headers)
  200.         if self._headersonly:
  201.             lines = []
  202.             while True:
  203.                 line = self._input.readline()
  204.                 if line is NeedMoreData:
  205.                     yield NeedMoreData
  206.                     continue
  207.                 
  208.                 if line == '':
  209.                     break
  210.                 
  211.                 lines.append(line)
  212.             self._cur.set_payload(EMPTYSTRING.join(lines))
  213.             return None
  214.         
  215.         if self._cur.get_content_type() == 'message/delivery-status':
  216.             while True:
  217.                 self._input.push_eof_matcher(NLCRE.match)
  218.                 for retval in self._parsegen():
  219.                     if retval is NeedMoreData:
  220.                         yield NeedMoreData
  221.                         continue
  222.                     
  223.                     break
  224.                 
  225.                 msg = self._pop_message()
  226.                 self._input.pop_eof_matcher()
  227.                 while True:
  228.                     line = self._input.readline()
  229.                     if line is NeedMoreData:
  230.                         yield NeedMoreData
  231.                         continue
  232.                     
  233.                     break
  234.                 while True:
  235.                     line = self._input.readline()
  236.                     if line is NeedMoreData:
  237.                         yield NeedMoreData
  238.                         continue
  239.                     
  240.                     break
  241.                 if line == '':
  242.                     break
  243.                 
  244.                 self._input.unreadline(line)
  245.             return None
  246.         
  247.         if self._cur.get_content_maintype() == 'message':
  248.             for retval in self._parsegen():
  249.                 if retval is NeedMoreData:
  250.                     yield NeedMoreData
  251.                     continue
  252.                 
  253.                 break
  254.             
  255.             self._pop_message()
  256.             return None
  257.         
  258.         if self._cur.get_content_maintype() == 'multipart':
  259.             boundary = self._cur.get_boundary()
  260.             if boundary is None:
  261.                 self._cur.defects.append(Errors.NoBoundaryInMultipartDefect())
  262.                 lines = []
  263.                 for line in self._input:
  264.                     if line is NeedMoreData:
  265.                         yield NeedMoreData
  266.                         continue
  267.                     
  268.                     lines.append(line)
  269.                 
  270.                 self._cur.set_payload(EMPTYSTRING.join(lines))
  271.                 return None
  272.             
  273.             separator = '--' + boundary
  274.             boundaryre = re.compile('(?P<sep>' + re.escape(separator) + ')(?P<end>--)?(?P<ws>[ \\t]*)(?P<linesep>\\r\\n|\\r|\\n)?$')
  275.             capturing_preamble = True
  276.             preamble = []
  277.             linesep = False
  278.             while True:
  279.                 line = self._input.readline()
  280.                 if line is NeedMoreData:
  281.                     yield NeedMoreData
  282.                     continue
  283.                 
  284.                 if line == '':
  285.                     break
  286.                 
  287.                 mo = boundaryre.match(line)
  288.                 if mo:
  289.                     if mo.group('end'):
  290.                         linesep = mo.group('linesep')
  291.                         break
  292.                     
  293.                     if capturing_preamble:
  294.                         if preamble:
  295.                             lastline = preamble[-1]
  296.                             eolmo = NLCRE_eol.search(lastline)
  297.                             if eolmo:
  298.                                 preamble[-1] = lastline[:-len(eolmo.group(0))]
  299.                             
  300.                             self._cur.preamble = EMPTYSTRING.join(preamble)
  301.                         
  302.                         capturing_preamble = False
  303.                         self._input.unreadline(line)
  304.                         continue
  305.                     
  306.                     while True:
  307.                         line = self._input.readline()
  308.                         if line is NeedMoreData:
  309.                             yield NeedMoreData
  310.                             continue
  311.                         
  312.                         mo = boundaryre.match(line)
  313.                         if not mo:
  314.                             self._input.unreadline(line)
  315.                             break
  316.                             continue
  317.                     self._input.push_eof_matcher(boundaryre.match)
  318.                     for retval in self._parsegen():
  319.                         if retval is NeedMoreData:
  320.                             yield NeedMoreData
  321.                             continue
  322.                         
  323.                         break
  324.                     
  325.                     if self._last.get_content_maintype() == 'multipart':
  326.                         epilogue = self._last.epilogue
  327.                         if epilogue == '':
  328.                             self._last.epilogue = None
  329.                         elif epilogue is not None:
  330.                             mo = NLCRE_eol.search(epilogue)
  331.                             if mo:
  332.                                 end = len(mo.group(0))
  333.                                 self._last.epilogue = epilogue[:-end]
  334.                             
  335.                         
  336.                     else:
  337.                         payload = self._last.get_payload()
  338.                         if isinstance(payload, basestring):
  339.                             mo = NLCRE_eol.search(payload)
  340.                             if mo:
  341.                                 payload = payload[:-len(mo.group(0))]
  342.                                 self._last.set_payload(payload)
  343.                             
  344.                         
  345.                     self._input.pop_eof_matcher()
  346.                     self._pop_message()
  347.                     self._last = self._cur
  348.                     continue
  349.                 preamble.append(line)
  350.             if capturing_preamble:
  351.                 self._cur.defects.append(Errors.StartBoundaryNotFoundDefect())
  352.                 self._cur.set_payload(EMPTYSTRING.join(preamble))
  353.                 epilogue = []
  354.                 for line in self._input:
  355.                     if line is NeedMoreData:
  356.                         yield NeedMoreData
  357.                         continue
  358.                         continue
  359.                 
  360.                 self._cur.epilogue = EMPTYSTRING.join(epilogue)
  361.                 return None
  362.             
  363.             if linesep:
  364.                 epilogue = [
  365.                     '']
  366.             else:
  367.                 epilogue = []
  368.             for line in self._input:
  369.                 if line is NeedMoreData:
  370.                     yield NeedMoreData
  371.                     continue
  372.                 
  373.                 epilogue.append(line)
  374.             
  375.             if epilogue:
  376.                 firstline = epilogue[0]
  377.                 bolmo = NLCRE_bol.match(firstline)
  378.                 if bolmo:
  379.                     epilogue[0] = firstline[len(bolmo.group(0)):]
  380.                 
  381.             
  382.             self._cur.epilogue = EMPTYSTRING.join(epilogue)
  383.             return None
  384.         
  385.         lines = []
  386.         for line in self._input:
  387.             if line is NeedMoreData:
  388.                 yield NeedMoreData
  389.                 continue
  390.             
  391.             lines.append(line)
  392.         
  393.         self._cur.set_payload(EMPTYSTRING.join(lines))
  394.  
  395.     
  396.     def _parse_headers(self, lines):
  397.         lastheader = ''
  398.         lastvalue = []
  399.         for lineno, line in enumerate(lines):
  400.             if line[0] in ' \t':
  401.                 if not lastheader:
  402.                     defect = Errors.FirstHeaderLineIsContinuationDefect(line)
  403.                     self._cur.defects.append(defect)
  404.                     continue
  405.                 
  406.                 lastvalue.append(line)
  407.                 continue
  408.             
  409.             if lastheader:
  410.                 lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n')
  411.                 self._cur[lastheader] = lhdr
  412.                 lastheader = ''
  413.                 lastvalue = []
  414.             
  415.             if line.startswith('From '):
  416.                 if lineno == 0:
  417.                     mo = NLCRE_eol.search(line)
  418.                     if mo:
  419.                         line = line[:-len(mo.group(0))]
  420.                     
  421.                     self._cur.set_unixfrom(line)
  422.                     continue
  423.                 elif lineno == len(lines) - 1:
  424.                     self._input.unreadline(line)
  425.                     return None
  426.                 else:
  427.                     defect = Errors.MisplacedEnvelopeHeaderDefect(line)
  428.                     self._cur.defects.append(defect)
  429.             
  430.             i = line.find(':')
  431.             if i < 0:
  432.                 defect = Errors.MalformedHeaderDefect(line)
  433.                 self._cur.defects.append(defect)
  434.                 continue
  435.             
  436.             lastheader = line[:i]
  437.             lastvalue = [
  438.                 line[i + 1:].lstrip()]
  439.         
  440.         if lastheader:
  441.             self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
  442.         
  443.  
  444.  
  445.